import io
import os
import sys
import types
import numpy as np
import pandas as pd
import altair as alt
import dropbox
import mysql.connector
from datetime import date
from datetime import datetime, timedelta
import logging
%run MakeNBImportAvailable.ipynb
import GetMySQL
import knmi_gn_001 as gethdd
importing Jupyter notebook from GetMySQL.ipynb importing Jupyter notebook from knmi_gn_001.ipynb
log_format = "%(asctime)s::%(levelname)s::%(name)s::"\
"%(filename)s::%(lineno)d::%(message)s"
logging.basicConfig(filename='Energy.log', level='DEBUG', format=log_format)
color_gn = ['#D35400',
'#7D3C98',
'steelblue',
'chartreuse',
'#F4D03F',
"#1696d2",
"#d2d2d2",
"#000000",
"#fdbf11",
"#ec008b",
"#55b748",
"#5c5859",
"#db2b27"]
Doel:
Geef overzicht van energieverbruik dit kalenderjaar en dit factuur jaar tov andere jaren
Plaats gasverbruik in relatie tot meetwaarden van het weer
Werkwijze:
Database is minuut data, het duurt te lang om alle detaildata uit database te retrieven
Maak gebruik van file op schijf
Raadpleeg de database voor ontbrekende dagen en overschrijf daarbij de laatste 2 dagen (nr is input)
def leesdatabase(nrdays,latest):
"""
"""
# overwrite last nr of days
ndays = nrdays
poi = (latest- timedelta( days=ndays)).strftime('%Y-%m-%d')
poi = ''.join([poi,':'])
poi = GetMySQL.SetPOI(poi)
props = GetMySQL.readsettings()
df_g = GetMySQL.getSQL(poi,"item0054",props,"gas")
df_g = GetMySQL.RemoveReadingErrors_r1(df_g)
df_EH = GetMySQL.getSQL(poi,"item0052",props,"EH")
df_EH = GetMySQL.RemoveReadingErrors_r1(df_EH)
df_EL = GetMySQL.getSQL(poi,"item0051",props,"EL")
df_EL = GetMySQL.RemoveReadingErrors_r1(df_EL)
df = pd.concat([df_g,df_EH,df_EL])
return df
def berekenverbruik(df):
df = df.reset_index()
df['verbruik'] = df[['counter','variable']].groupby(['variable']).diff().shift(-1)
df = df[~df['verbruik'].isnull()]
df = df.groupby(['variable','dy'])['verbruik'].sum().reset_index()
df['dy'] = pd.to_datetime(df['dy'])
return df
def updateschijfdata(df_hist,df,fn):
# verwijder dezelfde data punten uit de historische df
df_hist = df_hist[~df_hist['dy'].isin(df['dy'])]
df = df_hist.append(df)
df = df.sort_values(['variable','dy'],ascending=True)
dowrite=df.to_csv(fn)
logging.info("data written to %s",fn)
return df
def add_electricity(df):
""" electricity comes with 2 categories: EH + EL
this functions combines these two and adds this data as one category to the df
"""
electr = df[(df['variable']!="gas")&(df['variable']!="electricieit")]
electr = electr[['verbruik','variable','dy']].groupby('dy').sum().reset_index()
electr['variable']='electriciteit'
df = pd.concat([df,electr])
logging.info(df.head())
logging.info("electricity of EH and EL combined in one category Electricity and added to the df")
return df
def add_calendarinfo(df,firstmonth):
df['yr'] = df['dy'].dt.year
df['month'] =df['dy'].dt.month
df['day']=df['dy'].dt.day
df['year'] = 2024
df['normaldate'] =pd.to_datetime(df[['year','month', 'day']],format='%Y-%m-%d')
logging.info("calendar info added to df")
df['cumul'] = df[['variable','dy','verbruik','yr']].groupby(['variable','yr']).cumsum()
# Factuur jaar FJ
# corrigeer zodat eerste factuurmaand nr 1 wordt
df['FJ_mnth'] = df['month']-(firstmonth-1)
# corrigeer number maand in het jaar naar nummer maand in factuur jaar
# als eerste maand 10 is, dan maand september wordt 12 : 9+(12-(10-1))
df.loc[df['month']<firstmonth,['FJ_mnth']] = df.loc[df['month']<firstmonth,'month'].apply(lambda x: x+ (12-(firstmonth-1)))
df['FJ_yr'] = df['yr']
df.loc[df['month']>=firstmonth,['FJ_yr']] = df.loc[df['month']>=firstmonth,'yr'].apply(lambda x: x+1)
df['FJ_cumul'] = df[['variable','FJ_yr','dy','verbruik']].groupby(['variable','FJ_yr']).cumsum()
df['subtractyears'] = 0
df.loc[df['month']>=firstmonth,['subtractyears']] = 1
df['FJ_normaldate'] = df.apply(lambda x: x['normaldate']-pd.offsets.DateOffset(years=x['subtractyears']),axis=1)
# Mathijs (2020-08-01)
# Covid (thuiswerken 2020-03-01)
# Vloerisolatie (2022-02-25)
df['period']='1.pre-Covid'
df.loc[df['dy']>='2020-03-01','period']='2.Covid'
df.loc[df['dy']>='2021-09-01','period']='3.Mathijs'
df.loc[df['dy']>='2022-02-26','period']='4.VloerIsolatie'
df.loc[df['dy']>='2022-06-12','period']='5.ExtraDakIsolatie'
df.loc[df['dy']>='2022-12-02','period']='6.CVnaar50dC'
df['daysago'] = df['dy'].max() - df['dy']
df['daysago'] = df['daysago'].dt.days
return df
def plot_kalenderjaar(df):
# jaaroverzichten
yrdata = df[['variable','yr','verbruik']].groupby(['variable','yr']).sum().reset_index()
source = yrdata[yrdata['yr']!=2018]
chart1=alt.Chart(source.reset_index(),title="Energieverbruik per jaar").mark_bar().encode(
x='yr:O',
y=alt.Y("verbruik:Q"),
# color='yr:N',
column='variable',
color=alt.Color('yr:N',scale=alt.Scale(range=color_gn))
).interactive().properties(width=150,height=350)
display(chart1)
chart1.save('kalenderjaar.png')
yrdata = pd.pivot_table(yrdata[yrdata['yr']!=2018], values = 'verbruik' , index = 'variable',columns = 'yr')
# yrdata = yrdata.round(0)
yrdata = yrdata.style.format(precision=0, na_rep='MISSING')
display(yrdata)
def plotmedium(df,mediumstr):
crrntyr = df['yr'].max()
source1 = df[(df['variable']==mediumstr)&(df['yr']!=crrntyr)]
chart1 = alt.Chart(source1,
title=mediumstr).mark_line(strokeWidth=1,point=True).encode(
x=alt.X('month:N',axis=alt.Axis(title="Maand")),
y=alt.Y("verbruik:Q",axis=alt.Axis(title="Verbruik")),
color=alt.Color('yr:N',scale=alt.Scale(range=color_gn))).interactive().properties(width=300,height=250)
source2 = df[(df['variable']==mediumstr)&(df['yr']==crrntyr)]
chart2 = alt.Chart(source2,
title=mediumstr).mark_line(strokeWidth=3,point=True).encode(
x=alt.X('month:N',axis=alt.Axis(title="Maand")),
y=alt.Y("verbruik:Q",axis=alt.Axis(title="Verbruik")),
color=alt.Color('yr:N',scale=alt.Scale(range=color_gn))).interactive().properties(width=300,height=250)
return (chart1+chart2)
def plot_months(df):
df_month = df.loc[df['yr']!=2018,['variable','yr','month','verbruik']].groupby(['variable','yr','month']).sum().reset_index()
a1 = plotmedium(df_month,"gas")
a2 = plotmedium(df_month,"electriciteit")
a3 = plotmedium(df_month,"EL")
a4 = plotmedium(df_month,"EH")
chart_all=alt.vconcat(a1|a2,a3|a4)
display(chart_all)
def plotcumuls(df,mediumstr):
crrntyr = df['yr'].max()
source1 = df[(df['variable']==mediumstr)&(df['yr']!=crrntyr)]
chart1 = alt.Chart(source1,title=mediumstr).mark_line(
strokeWidth=1).encode(
x=alt.X('normaldate:T',
axis=alt.Axis(title="datum")),
y=alt.Y("cumul:Q",
axis=alt.Axis(title="Verbruik")),
# color='yr:N'
color=alt.Color('yr:N',scale=alt.Scale(range=color_gn))
).properties(width=300,height=250).interactive()
source2 = df[(df['variable']==mediumstr)&(df['yr']==crrntyr)]
chart2 = alt.Chart(source2,title=mediumstr).mark_line(
strokeWidth=3).encode(
x=alt.X('normaldate:T'),
y=alt.Y("cumul:Q"),
color='yr:N')
return (chart1+chart2)
def plotcumul(df):
a1 = plotcumuls(df[df['yr']!=2018],"electriciteit")
a2 = plotcumuls(df[df['yr']!=2018],"gas")
a3 = plotcumuls(df[df['yr']!=2018],"EL")
a4 = plotcumuls(df[df['yr']!=2018],"EH")
chart_all=alt.vconcat(a1|a2,a3|a4)
display(chart_all)
logging.info("read data from file")
fn = "dagelijks_energieverbuik.csv"
df_hist = pd.read_csv(fn, parse_dates=['dy'],
index_col = ['Unnamed: 0'])
latest = df_hist['dy'].max()
logging.info("last data point is: %s",str(latest))
nrdays = 10
df = leesdatabase(nrdays,latest)
POI is : {'start': '2022-12-22', 'end': '2023-01-02'}
df.tail(10)
| counter | variable | dy | |
|---|---|---|---|
| datetime | |||
| 2023-01-01 12:20:00 | 7596.610 | EL | 2023-01-01 |
| 2023-01-01 12:21:00 | 7596.614 | EL | 2023-01-01 |
| 2023-01-01 12:22:00 | 7596.614 | EL | 2023-01-01 |
| 2023-01-01 12:23:00 | 7596.625 | EL | 2023-01-01 |
| 2023-01-01 12:24:00 | 7596.629 | EL | 2023-01-01 |
| 2023-01-01 12:25:00 | 7596.629 | EL | 2023-01-01 |
| 2023-01-01 12:26:00 | 7596.635 | EL | 2023-01-01 |
| 2023-01-01 12:27:00 | 7596.640 | EL | 2023-01-01 |
| 2023-01-01 12:28:00 | 7596.644 | EL | 2023-01-01 |
| 2023-01-01 12:29:00 | 7596.649 | EL | 2023-01-01 |
# add latest data from database to file
df = berekenverbruik(df)
# werk de file bij met laatste data
df = updateschijfdata(df_hist,df,fn)
# check dat variabele electriciteit nog niet bestaat
df[df['variable']=='electriciteit']
| variable | dy | verbruik | yr | month | day | year | normaldate | cumul | FJ_mnth | FJ_yr | FJ_cumul | subtractyears | FJ_normaldate | period | daysago |
|---|
# before this step there should not be any line with variable electriciteit, maar kan per ongeluk in file geslopen zijn, dan handmatig eruit mikken en opnieuw script draaien
df = add_electricity(df)
df[df['variable']=='electriciteit']
| variable | dy | verbruik | yr | month | day | year | normaldate | cumul | FJ_mnth | FJ_yr | FJ_cumul | subtractyears | FJ_normaldate | period | daysago | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | electriciteit | 2018-11-25 | 6.604 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 1 | electriciteit | 2018-11-26 | 11.526 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2 | electriciteit | 2018-11-27 | 9.133 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 3 | electriciteit | 2018-11-28 | 9.126 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 4 | electriciteit | 2018-11-29 | 8.582 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1458 | electriciteit | 2022-12-28 | 9.164 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 1459 | electriciteit | 2022-12-29 | 10.247 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 1460 | electriciteit | 2022-12-30 | 12.159 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 1461 | electriciteit | 2022-12-31 | 16.195 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 1462 | electriciteit | 2023-01-01 | 5.069 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1463 rows × 16 columns
# introduce firstmonth of period when not comparing calendar year data
firstmonth = 6
df = add_calendarinfo(df,firstmonth)
plot_kalenderjaar(df)
INFO:tornado.access:200 GET / (::1) 6.20ms INFO:tornado.access:200 GET /vega.js (::1) 5.45ms INFO:tornado.access:200 GET /vega-lite.js (::1) 4.55ms INFO:tornado.access:200 GET /vega-embed.js (::1) 4.71ms
| yr | 2019 | 2020 | 2021 | 2022 | 2023 |
|---|---|---|---|---|---|
| variable | |||||
| EH | 1957 | 2266 | 2444 | 2134 | 0 |
| EL | 1610 | 1629 | 1657 | 1935 | 5 |
| electriciteit | 3567 | 3894 | 4101 | 4069 | 5 |
| gas | 1433 | 1318 | 1739 | 1182 | 2 |
plot_months(df)
plotcumul(df)
def plotFJcumulmedium(df,mediumstr):
crrntyr = df['FJ_yr'].max()
source1 = df[(df['variable']==mediumstr)&(df['FJ_yr']!=crrntyr)]
chart1 = alt.Chart(source1,title=mediumstr).mark_line(
strokeWidth=1).encode(
x=alt.X('FJ_normaldate:T',
axis=alt.Axis(title="datum")),
y=alt.Y("FJ_cumul:Q",
axis=alt.Axis(title="Verbruik")),
# color='FJ_yr:N'
color=alt.Color('FJ_yr:N',scale=alt.Scale(range=color_gn))
).properties(width=300,height=250).interactive()
source2 = df[(df['variable']==mediumstr)&(df['FJ_yr']==crrntyr)]
chart2 = alt.Chart(source2,title=mediumstr).mark_line(
strokeWidth=3).encode(
x=alt.X('FJ_normaldate:T'),
y=alt.Y("FJ_cumul:Q"),
color='FJ_yr:N')
return (chart1+chart2)
def plotFJcumuls(df):
a1 = plotFJcumulmedium(df,"electriciteit")
a2 = plotFJcumulmedium(df,"gas")
a3 = plotFJcumulmedium(df,"EL")
a4 = plotFJcumulmedium(df,"EH")
chart_all=alt.vconcat(a1|a2,a3|a4)
display(chart_all)
plotFJcumuls(df[:-3])
def plotbars(df,mediumstr):
chart1 = alt.Chart(df[df['variable']==mediumstr],
title=mediumstr).mark_bar(size=20).encode(
x=alt.X('dy:T',
axis=alt.Axis(title="datum")),
y=alt.Y("verbruik:Q",
axis=alt.Axis(title="Verbruik")),
# color='variable:N'
color=alt.Color('variable:N',scale=alt.Scale(range=color_gn))
).properties(width=300,height=250)
return (chart1)
def plot_lastdays(df, nrdays):
datefrom = df['dy'].max()-timedelta(days=nrdays)
df = df[df['dy']>datefrom]
a1 = plotbars(df,"electriciteit")
a2 = plotbars(df,"gas")
a3 = plotbars(df,"EL")
a4 = plotbars(df,"EH")
chart_all=alt.vconcat(a1|a2,a3|a4)
display(chart_all)
tbl = pd.pivot_table(df,index='dy',values='verbruik',columns='variable')
display(tbl)
plot_lastdays(df,10)
| variable | EH | EL | electriciteit | gas |
|---|---|---|---|---|
| dy | ||||
| 2022-12-23 | 11.655 | 3.383 | 15.038 | 1.587 |
| 2022-12-24 | 0.000 | 16.716 | 16.716 | 0.646 |
| 2022-12-25 | 0.000 | 10.612 | 10.612 | 5.373 |
| 2022-12-26 | 0.000 | 9.752 | 9.752 | 0.811 |
| 2022-12-27 | 12.016 | 1.827 | 13.843 | 7.765 |
| 2022-12-28 | 7.544 | 1.620 | 9.164 | 4.691 |
| 2022-12-29 | 6.699 | 3.548 | 10.247 | 4.849 |
| 2022-12-30 | 8.575 | 3.584 | 12.159 | 5.763 |
| 2022-12-31 | 0.000 | 16.195 | 16.195 | 2.404 |
| 2023-01-01 | 0.000 | 5.069 | 5.069 | 1.507 |
def get_weatherdata(firstmonth):
df = gethdd.retrieveHDD()
df['month'] = df.index.month
df['yr'] = df.index.year
df['FJ_mnth'] = df['month']-(firstmonth-1)
# df.loc[df['month']<10,['FJ_mnth']] = df.loc[df['month']<10,'month'].apply(lambda x: x+3)
# als eerste maand 10 is, dan maand september wordt 12 : 9+(12-(10-1))
df.loc[df['month']<firstmonth,['FJ_mnth']] = df.loc[df['month']<firstmonth,'month'].apply(lambda x: x+ (12-(firstmonth-1)))
df['FJ_yr'] = df['yr']
df.loc[df['month']>=firstmonth,['FJ_yr']] = df.loc[df['month']>=firstmonth,'yr'].apply(lambda x: x+1)
df['FJ_cumul_hdd'] = df[['HDD','FJ_yr']].groupby(['FJ_yr']).cumsum()
df['Teff'] = df['TG']*1/10 - 2/3*df['FG']*1/10
df['day']=df.index.day
df['year'] = 2024
df['normaldate'] =pd.to_datetime(df[['year','month', 'day']],format='%Y-%m-%d')
df['subtractyears'] = 0
df.loc[df['month']>=firstmonth,['subtractyears']] = 1
df['FJ_normaldate'] = df.apply(lambda x: x['normaldate']-pd.offsets.DateOffset(years=x['subtractyears']),axis=1)
return df
df_hdd = get_weatherdata(firstmonth)
cols = ['HDD','FJ_cumul_hdd','TG','Teff']
df_hdd = df_hdd[df['dy'].min():]
read the csv file with knmi data knmi_data.csv
last data point in csv file: 2022-12-30 00:00:00
The current time is : 2023-01-01 12:29:44.252130
The number of days since last datapoint is : 2
Update noodzakelijk
data saved to file : knmi_data.csv
KNMI last datapoint is : 2022-12-30 00:00:00
Current time : 2023-01-01 12:29:44.795630
Elapsed time since [hours] : 60
Nr rows to add : 2
SQ Q DR TG FG RH HDD dow
datetime
2022-12-28 0.0 65.0 137.0 85.0 57.0 47.0 9.300000 Wednesday
2022-12-29 18.0 157.0 43.0 92.0 63.0 31.0 9.000000 Thursday
2022-12-30 0.0 137.0 50.0 83.0 53.0 16.0 9.233333 Friday
2022-12-31 0.0 0.0 0.0 0.0 0.0 0.0 14.000000 Saturday
2023-01-01 0.0 0.0 0.0 0.0 0.0 0.0 14.000000 Sunday
df_hdd.tail()
| SQ | Q | DR | TG | FG | RH | HDD | dow | month | yr | FJ_mnth | FJ_yr | FJ_cumul_hdd | Teff | day | year | normaldate | subtractyears | FJ_normaldate | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| datetime | |||||||||||||||||||
| 2022-12-28 | 0.0 | 65.0 | 137.0 | 85.0 | 57.0 | 47.0 | 9.300000 | Wednesday | 12 | 2022 | 7 | 2023 | 765.033333 | 4.700000 | 28 | 2024 | 2024-12-28 | 1 | 2023-12-28 |
| 2022-12-29 | 18.0 | 157.0 | 43.0 | 92.0 | 63.0 | 31.0 | 9.000000 | Thursday | 12 | 2022 | 7 | 2023 | 774.033333 | 5.000000 | 29 | 2024 | 2024-12-29 | 1 | 2023-12-29 |
| 2022-12-30 | 0.0 | 137.0 | 50.0 | 83.0 | 53.0 | 16.0 | 9.233333 | Friday | 12 | 2022 | 7 | 2023 | 783.266667 | 4.766667 | 30 | 2024 | 2024-12-30 | 1 | 2023-12-30 |
| 2022-12-31 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 14.000000 | Saturday | 12 | 2022 | 7 | 2023 | 797.266667 | 0.000000 | 31 | 2024 | 2024-12-31 | 1 | 2023-12-31 |
| 2023-01-01 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 14.000000 | Sunday | 1 | 2023 | 8 | 2023 | 811.266667 | 0.000000 | 1 | 2024 | 2024-01-01 | 0 | 2024-01-01 |
df = df.merge(df_hdd[cols],left_on="dy",right_on="datetime",how='right')
df.tail()
| variable | dy | verbruik | yr | month | day | year | normaldate | cumul | FJ_mnth | FJ_yr | FJ_cumul | subtractyears | FJ_normaldate | period | daysago | HDD | FJ_cumul_hdd | TG | Teff | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 5859 | electriciteit | 2022-12-31 | 16.195 | 2022.0 | 12.0 | 31.0 | 2024.0 | 2024-12-31 | 4069.299 | 7.0 | 2023.0 | 2262.881 | 1.0 | 2023-12-31 | 6.CVnaar50dC | 1.0 | 14.0 | 797.266667 | 0.0 | 0.0 |
| 5860 | EH | 2023-01-01 | 0.000 | 2023.0 | 1.0 | 1.0 | 2024.0 | 2024-01-01 | 0.000 | 8.0 | 2023.0 | 1084.988 | 0.0 | 2024-01-01 | 6.CVnaar50dC | 0.0 | 14.0 | 811.266667 | 0.0 | 0.0 |
| 5861 | EL | 2023-01-01 | 5.069 | 2023.0 | 1.0 | 1.0 | 2024.0 | 2024-01-01 | 5.069 | 8.0 | 2023.0 | 1182.962 | 0.0 | 2024-01-01 | 6.CVnaar50dC | 0.0 | 14.0 | 811.266667 | 0.0 | 0.0 |
| 5862 | gas | 2023-01-01 | 1.507 | 2023.0 | 1.0 | 1.0 | 2024.0 | 2024-01-01 | 1.507 | 8.0 | 2023.0 | 405.367 | 0.0 | 2024-01-01 | 6.CVnaar50dC | 0.0 | 14.0 | 811.266667 | 0.0 | 0.0 |
| 5863 | electriciteit | 2023-01-01 | 5.069 | 2023.0 | 1.0 | 1.0 | 2024.0 | 2024-01-01 | 5.069 | 8.0 | 2023.0 | 2267.950 | 0.0 | 2024-01-01 | 6.CVnaar50dC | 0.0 | 14.0 | 811.266667 | 0.0 | 0.0 |
source = df[df['verbruik']!=0]
chart1 = alt.Chart(source[(source['variable']=='gas')&
(source['period']!='6.CVNaar50dC')]).mark_point(clip=True).encode(
x=alt.X('HDD:Q',axis=alt.Axis(title='HDD'),scale=alt.Scale(domain=(0, 26))),
y=alt.Y('verbruik',axis=alt.Axis(title='verbruik'),scale=alt.Scale(domain=(0, 20))),
# color='period:N',
color=alt.Color('period:N',scale=alt.Scale(range=color_gn)),
size='period'
).properties(width=600,height=400)
chart2 = alt.Chart(source[(source['variable']=='gas')&
(source['period']=='6.CVNaar50dC')]).mark_point(
clip=True,filled=True,size=200).encode(
x=alt.X('HDD:Q',axis=alt.Axis(title='HDD'),scale=alt.Scale(domain=(0, 26))),
y=alt.Y('verbruik',axis=alt.Axis(title='verbruik'),scale=alt.Scale(domain=(0, 20))),
# color='period:N',
color=alt.Color('period:N',scale=alt.Scale(range=color_gn)),
size='period'
).properties(width=600,height=400)
chart1+chart2
/home/gert/env/lib/python3.9/site-packages/altair/utils/core.py:185: UserWarning: I don't know how to infer vegalite type from 'empty'. Defaulting to nominal. warnings.warn(
source = df[(df['variable']=='gas')&(df['daysago']>5)]
source = source[source['verbruik']!=0]
chart1 = alt.Chart(source).mark_point(clip=True).encode(
x=alt.X('HDD:Q',axis=alt.Axis(title='HDD'),scale=alt.Scale(domain=(0, 26))),
y=alt.Y('verbruik',axis=alt.Axis(title='verbruik'),scale=alt.Scale(domain=(0, 20))),
# fill='period:N',
fill=alt.Fill('period:N',scale=alt.Scale(range=color_gn)),
size=alt.Size('period',scale=alt.Scale(range=[50,200]))
).interactive().properties(width=600,height=400)
# points = base.mark_point(clip=True,size=100).encode(
# fill = alt.Fill('daysago:Q', scale = alt.Scale(scheme='plasma')),
# shape = alt.Shape('dow:N',sort=cats),
# size = alt.Size('daysago', scale=alt.Scale(range=[250, 10]))
# )
chart2 = alt.Chart(df[(df['variable']=='gas')&(df['daysago']<=5)&(df['daysago']>0)]
).mark_point(clip=True,size=300,fill='red').encode(
x=alt.X('HDD:Q'),
y=alt.Y('verbruik'),
shape='daysago:N'
).properties(width=600,height=400)
text = chart2.mark_text(
align='left',
baseline='middle',
dx=12
).encode(
text='daysago'
)
chart1 + chart2 +text
source = df[(df['variable']=='gas')&(df['daysago']>5)]
source = source[source['verbruik']!=0]
color_gn = ['#D35400','#7D3C98', 'steelblue', 'chartreuse', '#F4D03F','green']
chart1 = alt.Chart(source).mark_point(clip=True).encode(
x=alt.X('Teff:Q',axis=alt.Axis(title='Eff Temp'),scale=alt.Scale(domain=(-15, 26))),
y=alt.Y('verbruik',axis=alt.Axis(title='verbruik'),scale=alt.Scale(domain=(0, 20))),
# fill='period:N',
# fill=alt.Fill('period:N',scale=alt.Scale(scheme='dark2')),
fill=alt.Fill('period:N',scale=alt.Scale(range=color_gn)),
size=alt.Size('period',scale=alt.Scale(range=[50,200]))
).interactive().properties(width=600,height=400)
# points = base.mark_point(clip=True,size=100).encode(
# fill = alt.Fill('daysago:Q', scale = alt.Scale(scheme='plasma')),
# shape = alt.Shape('dow:N',sort=cats),
# size = alt.Size('daysago', scale=alt.Scale(range=[250, 10]))
# )
chart2 = alt.Chart(df[(df['variable']=='gas')&(df['daysago']<=5)&(df['daysago']>0)]
).mark_point(clip=True,size=300,fill='red').encode(
x=alt.X('Teff:Q'),
y=alt.Y('verbruik'),
shape='daysago:N'
).properties(width=600,height=400)
text = chart2.mark_text(
align='left',
baseline='middle',
dx=12
).encode(
text='daysago'
)
chart1 + chart2 +text
df_hdd.tail()
| SQ | Q | DR | TG | FG | RH | HDD | dow | month | yr | FJ_mnth | FJ_yr | FJ_cumul_hdd | Teff | day | year | normaldate | subtractyears | FJ_normaldate | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| datetime | |||||||||||||||||||
| 2022-12-28 | 0.0 | 65.0 | 137.0 | 85.0 | 57.0 | 47.0 | 9.300000 | Wednesday | 12 | 2022 | 7 | 2023 | 765.033333 | 4.700000 | 28 | 2024 | 2024-12-28 | 1 | 2023-12-28 |
| 2022-12-29 | 18.0 | 157.0 | 43.0 | 92.0 | 63.0 | 31.0 | 9.000000 | Thursday | 12 | 2022 | 7 | 2023 | 774.033333 | 5.000000 | 29 | 2024 | 2024-12-29 | 1 | 2023-12-29 |
| 2022-12-30 | 0.0 | 137.0 | 50.0 | 83.0 | 53.0 | 16.0 | 9.233333 | Friday | 12 | 2022 | 7 | 2023 | 783.266667 | 4.766667 | 30 | 2024 | 2024-12-30 | 1 | 2023-12-30 |
| 2022-12-31 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 14.000000 | Saturday | 12 | 2022 | 7 | 2023 | 797.266667 | 0.000000 | 31 | 2024 | 2024-12-31 | 1 | 2023-12-31 |
| 2023-01-01 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 14.000000 | Sunday | 1 | 2023 | 8 | 2023 | 811.266667 | 0.000000 | 1 | 2024 | 2024-01-01 | 0 | 2024-01-01 |
def plotFJcumulHDD(df):
crrntyr = df['FJ_yr'].max()
source1 = df[(df['FJ_yr']!=crrntyr)]
chart1 = alt.Chart(source1,title="HDD").mark_line(
strokeWidth=1).encode(
x=alt.X('FJ_normaldate:T',
axis=alt.Axis(title="datum")),
y=alt.Y("FJ_cumul_hdd:Q",
axis=alt.Axis(title="Verbruik")),
color='FJ_yr:N'
).properties(width=300,height=250).interactive()
source2 = df[(df['FJ_yr']==crrntyr)]
chart2 = alt.Chart(source2).mark_line(
strokeWidth=3).encode(
x=alt.X('FJ_normaldate:T'),
y=alt.Y("FJ_cumul_hdd:Q"),
color='FJ_yr:N')
return (chart1+chart2)
today = date.today()
df[df['dy']!=today.strftime("%Y-%m-%d")].tail()
| variable | dy | verbruik | yr | month | day | year | normaldate | cumul | FJ_mnth | FJ_yr | FJ_cumul | subtractyears | FJ_normaldate | period | daysago | HDD | FJ_cumul_hdd | TG | Teff | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 5855 | electriciteit | 2022-12-30 | 12.159 | 2022.0 | 12.0 | 30.0 | 2024.0 | 2024-12-30 | 4053.104 | 7.0 | 2023.0 | 2246.686 | 1.0 | 2023-12-30 | 6.CVnaar50dC | 2.0 | 9.233333 | 783.266667 | 83.0 | 4.766667 |
| 5856 | EH | 2022-12-31 | 0.000 | 2022.0 | 12.0 | 31.0 | 2024.0 | 2024-12-31 | 2133.919 | 7.0 | 2023.0 | 1084.988 | 1.0 | 2023-12-31 | 6.CVnaar50dC | 1.0 | 14.000000 | 797.266667 | 0.0 | 0.000000 |
| 5857 | EL | 2022-12-31 | 16.195 | 2022.0 | 12.0 | 31.0 | 2024.0 | 2024-12-31 | 1935.380 | 7.0 | 2023.0 | 1177.893 | 1.0 | 2023-12-31 | 6.CVnaar50dC | 1.0 | 14.000000 | 797.266667 | 0.0 | 0.000000 |
| 5858 | gas | 2022-12-31 | 2.404 | 2022.0 | 12.0 | 31.0 | 2024.0 | 2024-12-31 | 1182.297 | 7.0 | 2023.0 | 403.860 | 1.0 | 2023-12-31 | 6.CVnaar50dC | 1.0 | 14.000000 | 797.266667 | 0.0 | 0.000000 |
| 5859 | electriciteit | 2022-12-31 | 16.195 | 2022.0 | 12.0 | 31.0 | 2024.0 | 2024-12-31 | 4069.299 | 7.0 | 2023.0 | 2262.881 | 1.0 | 2023-12-31 | 6.CVnaar50dC | 1.0 | 14.000000 | 797.266667 | 0.0 | 0.000000 |
display(plotFJcumulHDD(df_hdd[df_hdd.index.date!=datetime.now().date()])|plotFJcumulmedium(df[df['dy']!=today.strftime("%Y-%m-%d")],"gas"))
df.columns
Index(['variable', 'dy', 'verbruik', 'yr', 'month', 'day', 'year',
'normaldate', 'cumul', 'FJ_mnth', 'FJ_yr', 'FJ_cumul', 'subtractyears',
'FJ_normaldate', 'period', 'daysago', 'HDD', 'FJ_cumul_hdd', 'TG',
'Teff'],
dtype='object')
t2 = df.iloc[-1]['dy']
t1 = df[(df['yr']==2022)&(df['FJ_mnth']==1)&(df['day']==1)]['dy'].tolist()[0]
t2-t1
Timedelta('214 days 00:00:00')
df.tail()
| variable | dy | verbruik | yr | month | day | year | normaldate | cumul | FJ_mnth | FJ_yr | FJ_cumul | subtractyears | FJ_normaldate | period | daysago | HDD | FJ_cumul_hdd | TG | Teff | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 5859 | electriciteit | 2022-12-31 | 16.195 | 2022.0 | 12.0 | 31.0 | 2024.0 | 2024-12-31 | 4069.299 | 7.0 | 2023.0 | 2262.881 | 1.0 | 2023-12-31 | 6.CVnaar50dC | 1.0 | 14.0 | 797.266667 | 0.0 | 0.0 |
| 5860 | EH | 2023-01-01 | 0.000 | 2023.0 | 1.0 | 1.0 | 2024.0 | 2024-01-01 | 0.000 | 8.0 | 2023.0 | 1084.988 | 0.0 | 2024-01-01 | 6.CVnaar50dC | 0.0 | 14.0 | 811.266667 | 0.0 | 0.0 |
| 5861 | EL | 2023-01-01 | 5.069 | 2023.0 | 1.0 | 1.0 | 2024.0 | 2024-01-01 | 5.069 | 8.0 | 2023.0 | 1182.962 | 0.0 | 2024-01-01 | 6.CVnaar50dC | 0.0 | 14.0 | 811.266667 | 0.0 | 0.0 |
| 5862 | gas | 2023-01-01 | 1.507 | 2023.0 | 1.0 | 1.0 | 2024.0 | 2024-01-01 | 1.507 | 8.0 | 2023.0 | 405.367 | 0.0 | 2024-01-01 | 6.CVnaar50dC | 0.0 | 14.0 | 811.266667 | 0.0 | 0.0 |
| 5863 | electriciteit | 2023-01-01 | 5.069 | 2023.0 | 1.0 | 1.0 | 2024.0 | 2024-01-01 | 5.069 | 8.0 | 2023.0 | 2267.950 | 0.0 | 2024-01-01 | 6.CVnaar50dC | 0.0 | 14.0 | 811.266667 | 0.0 | 0.0 |
mnth_hdd = df[df['variable']=='gas'].groupby(['period','month']).sum().reset_index()
mnth_hdd.tail(10)
| period | month | verbruik | yr | day | year | cumul | FJ_mnth | FJ_yr | FJ_cumul | subtractyears | daysago | HDD | FJ_cumul_hdd | TG | Teff | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 33 | 4.VloerIsolatie | 6.0 | 5.861 | 22242.0 | 66.0 | 22264.0 | 8598.696 | 11.0 | 22253.0 | 35.889 | 11.0 | 2299.0 | 11.966667 | 94.766667 | 1694.0 | 145.466667 |
| 34 | 5.ExtraDakIsolatie | 6.0 | 8.891 | 38418.0 | 399.0 | 38456.0 | 14994.259 | 19.0 | 38437.0 | 203.956 | 19.0 | 3686.0 | 5.033333 | 301.166667 | 3372.0 | 310.533333 |
| 35 | 5.ExtraDakIsolatie | 7.0 | 9.154 | 62682.0 | 496.0 | 62744.0 | 24772.736 | 62.0 | 62713.0 | 641.189 | 31.0 | 5239.0 | 0.933333 | 544.733333 | 5590.0 | 512.400000 |
| 36 | 5.ExtraDakIsolatie | 8.0 | 10.317 | 62682.0 | 496.0 | 62744.0 | 25042.529 | 93.0 | 62713.0 | 910.982 | 31.0 | 4278.0 | 0.300000 | 557.133333 | 6050.0 | 566.333333 |
| 37 | 5.ExtraDakIsolatie | 9.0 | 16.933 | 60660.0 | 465.0 | 60720.0 | 24567.655 | 120.0 | 60690.0 | 1214.545 | 30.0 | 3225.0 | 70.800000 | 1106.933333 | 4252.0 | 385.133333 |
| 38 | 5.ExtraDakIsolatie | 10.0 | 26.348 | 62682.0 | 496.0 | 62744.0 | 26118.411 | 155.0 | 62713.0 | 1986.864 | 31.0 | 2387.0 | 85.833333 | 4394.833333 | 4024.0 | 350.866667 |
| 39 | 5.ExtraDakIsolatie | 11.0 | 108.855 | 60660.0 | 465.0 | 60720.0 | 26902.098 | 180.0 | 60690.0 | 3548.988 | 30.0 | 1395.0 | 231.966667 | 8374.233333 | 2493.0 | 188.033333 |
| 40 | 5.ExtraDakIsolatie | 12.0 | 6.738 | 2022.0 | 1.0 | 2024.0 | 971.534 | 7.0 | 2023.0 | 193.097 | 1.0 | 31.0 | 9.966667 | 416.800000 | 53.0 | 4.033333 |
| 41 | 6.CVnaar50dC | 1.0 | 1.507 | 2023.0 | 1.0 | 2024.0 | 1.507 | 8.0 | 2023.0 | 405.367 | 0.0 | 0.0 | 14.000000 | 811.266667 | 0.0 | 0.000000 |
| 42 | 6.CVnaar50dC | 12.0 | 210.763 | 60660.0 | 495.0 | 60720.0 | 32848.724 | 210.0 | 60690.0 | 9495.614 | 30.0 | 465.0 | 380.466667 | 18907.933333 | 948.0 | 39.533333 |
mnth_hdd[mnth_hdd['HDD']<10]
| period | month | verbruik | yr | day | year | cumul | FJ_mnth | FJ_yr | FJ_cumul | subtractyears | daysago | HDD | FJ_cumul_hdd | TG | Teff | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 5 | 1.pre-Covid | 6.0 | 19.191 | 60570.0 | 465.0 | 60720.0 | 24705.710 | 30.0 | 60600.0 | 308.030 | 30.0 | 38865.0 | 9.033333 | 194.300000 | 5518.0 | 497.666667 |
| 6 | 1.pre-Covid | 7.0 | 11.948 | 62589.0 | 496.0 | 62744.0 | 26030.977 | 62.0 | 62620.0 | 820.041 | 31.0 | 39215.0 | 8.933333 | 490.533333 | 5745.0 | 525.300000 |
| 7 | 1.pre-Covid | 8.0 | 18.476 | 62589.0 | 496.0 | 62744.0 | 26476.593 | 93.0 | 62620.0 | 1265.657 | 31.0 | 38254.0 | 2.966667 | 606.233333 | 5633.0 | 512.700000 |
| 34 | 5.ExtraDakIsolatie | 6.0 | 8.891 | 38418.0 | 399.0 | 38456.0 | 14994.259 | 19.0 | 38437.0 | 203.956 | 19.0 | 3686.0 | 5.033333 | 301.166667 | 3372.0 | 310.533333 |
| 35 | 5.ExtraDakIsolatie | 7.0 | 9.154 | 62682.0 | 496.0 | 62744.0 | 24772.736 | 62.0 | 62713.0 | 641.189 | 31.0 | 5239.0 | 0.933333 | 544.733333 | 5590.0 | 512.400000 |
| 36 | 5.ExtraDakIsolatie | 8.0 | 10.317 | 62682.0 | 496.0 | 62744.0 | 25042.529 | 93.0 | 62713.0 | 910.982 | 31.0 | 4278.0 | 0.300000 | 557.133333 | 6050.0 | 566.333333 |
| 40 | 5.ExtraDakIsolatie | 12.0 | 6.738 | 2022.0 | 1.0 | 2024.0 | 971.534 | 7.0 | 2023.0 | 193.097 | 1.0 | 31.0 | 9.966667 | 416.800000 | 53.0 | 4.033333 |
chart1 = alt.Chart(mnth_hdd[mnth_hdd['period']!='6.CVnaar50dC'],title="HDD_month").mark_point(
size=200).encode(
x=alt.X('HDD:Q',
axis=alt.Axis(title="HDD")),
y=alt.Y("verbruik:Q",
axis=alt.Axis(title="Verbruik")),
fill=alt.Fill('period:N',scale=alt.Scale(range=color_gn)),
shape = 'month:N'
).properties(width=600,height=400).interactive()
chart2=alt.Chart(mnth_hdd[mnth_hdd['period']=='6.CVnaar50dC']).mark_point(
size=500).encode(
x=alt.X('HDD:Q',
axis=alt.Axis(title="HDD")),
y=alt.Y("verbruik:Q",
axis=alt.Axis(title="Verbruik")),
fill='period:N',
shape = 'month:N'
)
chart = chart1 + chart2
mnth_hdd[mnth_hdd['period']=='6.CVnaar50dC']
| period | month | verbruik | yr | day | year | cumul | FJ_mnth | FJ_yr | FJ_cumul | subtractyears | daysago | HDD | FJ_cumul_hdd | TG | Teff | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 41 | 6.CVnaar50dC | 1.0 | 1.507 | 2023.0 | 1.0 | 2024.0 | 1.507 | 8.0 | 2023.0 | 405.367 | 0.0 | 0.0 | 14.000000 | 811.266667 | 0.0 | 0.000000 |
| 42 | 6.CVnaar50dC | 12.0 | 210.763 | 60660.0 | 495.0 | 60720.0 | 32848.724 | 210.0 | 60690.0 | 9495.614 | 30.0 | 465.0 | 380.466667 | 18907.933333 | 948.0 | 39.533333 |
chart
df['wk'] = df['dy'].dt.isocalendar().week
df.tail()
| variable | dy | verbruik | yr | month | day | year | normaldate | cumul | FJ_mnth | ... | FJ_cumul | subtractyears | FJ_normaldate | period | daysago | HDD | FJ_cumul_hdd | TG | Teff | wk | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 5859 | electriciteit | 2022-12-31 | 16.195 | 2022.0 | 12.0 | 31.0 | 2024.0 | 2024-12-31 | 4069.299 | 7.0 | ... | 2262.881 | 1.0 | 2023-12-31 | 6.CVnaar50dC | 1.0 | 14.0 | 797.266667 | 0.0 | 0.0 | 52 |
| 5860 | EH | 2023-01-01 | 0.000 | 2023.0 | 1.0 | 1.0 | 2024.0 | 2024-01-01 | 0.000 | 8.0 | ... | 1084.988 | 0.0 | 2024-01-01 | 6.CVnaar50dC | 0.0 | 14.0 | 811.266667 | 0.0 | 0.0 | 52 |
| 5861 | EL | 2023-01-01 | 5.069 | 2023.0 | 1.0 | 1.0 | 2024.0 | 2024-01-01 | 5.069 | 8.0 | ... | 1182.962 | 0.0 | 2024-01-01 | 6.CVnaar50dC | 0.0 | 14.0 | 811.266667 | 0.0 | 0.0 | 52 |
| 5862 | gas | 2023-01-01 | 1.507 | 2023.0 | 1.0 | 1.0 | 2024.0 | 2024-01-01 | 1.507 | 8.0 | ... | 405.367 | 0.0 | 2024-01-01 | 6.CVnaar50dC | 0.0 | 14.0 | 811.266667 | 0.0 | 0.0 | 52 |
| 5863 | electriciteit | 2023-01-01 | 5.069 | 2023.0 | 1.0 | 1.0 | 2024.0 | 2024-01-01 | 5.069 | 8.0 | ... | 2267.950 | 0.0 | 2024-01-01 | 6.CVnaar50dC | 0.0 | 14.0 | 811.266667 | 0.0 | 0.0 | 52 |
5 rows × 21 columns
wk_hdd = df[df['variable']=='gas'].groupby(['period','yr','wk']).sum().reset_index()
wk_hdd.tail()
| period | yr | wk | verbruik | month | day | year | cumul | FJ_mnth | FJ_yr | FJ_cumul | subtractyears | daysago | HDD | FJ_cumul_hdd | TG | Teff | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 213 | 6.CVnaar50dC | 2022.0 | 49 | 53.998 | 84.0 | 56.0 | 14168.0 | 7177.828 | 49.0 | 14161.0 | 1728.769 | 7.0 | 168.0 | 93.100000 | 3580.400000 | 139.0 | 4.900000 |
| 214 | 6.CVnaar50dC | 2022.0 | 50 | 75.517 | 84.0 | 105.0 | 14168.0 | 7661.788 | 49.0 | 14161.0 | 2212.729 | 7.0 | 119.0 | 124.933333 | 4385.800000 | -194.0 | -26.933333 |
| 215 | 6.CVnaar50dC | 2022.0 | 51 | 30.288 | 84.0 | 154.0 | 14168.0 | 8019.927 | 49.0 | 14161.0 | 2570.868 | 7.0 | 70.0 | 53.633333 | 4982.866667 | 581.0 | 44.366667 |
| 216 | 6.CVnaar50dC | 2022.0 | 52 | 26.283 | 72.0 | 171.0 | 12144.0 | 7027.016 | 42.0 | 12138.0 | 2356.394 | 6.0 | 21.0 | 63.533333 | 4619.333333 | 378.0 | 20.466667 |
| 217 | 6.CVnaar50dC | 2023.0 | 52 | 1.507 | 1.0 | 1.0 | 2024.0 | 1.507 | 8.0 | 2023.0 | 405.367 | 0.0 | 0.0 | 14.000000 | 811.266667 | 0.0 | 0.000000 |
chart1 = alt.Chart(wk_hdd[wk_hdd['period']!='6.CVnaar50dC'],title="HDD_week").mark_point(
size=100).encode(
x=alt.X('HDD:Q',
axis=alt.Axis(title="HDD")),
y=alt.Y("verbruik:Q",
axis=alt.Axis(title="Verbruik"),scale=alt.Scale(domain=[0, 120])),
fill=alt.Fill('period:N',scale=alt.Scale(range=color_gn)),
shape = 'wk:N'
).properties(width=600,height=400).interactive()
chart2 = alt.Chart(wk_hdd[wk_hdd['period']=='6.CVnaar50dC']).mark_point(
size=800).encode(
x=alt.X('HDD:Q',
axis=alt.Axis(title="HDD")),
y=alt.Y("verbruik:Q",
axis=alt.Axis(title="Verbruik")),
fill=alt.Fill('period:N',scale=alt.Scale(range=color_gn)),
shape = 'wk:N'
)
chart = chart1 + chart2
chart
chart1 = alt.Chart(wk_hdd[wk_hdd['period']=='5.ExtraDakIsolatie'],title="HDD_week").mark_point(
size=100).encode(
x=alt.X('HDD:Q',
axis=alt.Axis(title="HDD")),
y=alt.Y("verbruik:Q",
axis=alt.Axis(title="Verbruik"),scale=alt.Scale(domain=[0, 120])),
fill=alt.Fill('period:N',scale=alt.Scale(range=color_gn)),
shape = 'wk:N'
).properties(width=600,height=400).interactive()
chart2 = alt.Chart(wk_hdd[wk_hdd['period']=='6.CVnaar50dC']).mark_point(
size=800).encode(
x=alt.X('HDD:Q',
axis=alt.Axis(title="HDD")),
y=alt.Y("verbruik:Q",
axis=alt.Axis(title="Verbruik")),
fill=alt.Fill('period:N',scale=alt.Scale(range=color_gn)),
shape = 'wk:N'
)
chart = chart1 + chart2
chart
wk_hdd[wk_hdd['period']=='6.CVnaar50dC']
| period | yr | wk | verbruik | month | day | year | cumul | FJ_mnth | FJ_yr | FJ_cumul | subtractyears | daysago | HDD | FJ_cumul_hdd | TG | Teff | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 212 | 6.CVnaar50dC | 2022.0 | 48 | 24.677 | 36.0 | 9.0 | 6072.0 | 2962.165 | 21.0 | 6069.0 | 626.854 | 3.0 | 87.0 | 45.266667 | 1339.533333 | 44.0 | -3.266667 |
| 213 | 6.CVnaar50dC | 2022.0 | 49 | 53.998 | 84.0 | 56.0 | 14168.0 | 7177.828 | 49.0 | 14161.0 | 1728.769 | 7.0 | 168.0 | 93.100000 | 3580.400000 | 139.0 | 4.900000 |
| 214 | 6.CVnaar50dC | 2022.0 | 50 | 75.517 | 84.0 | 105.0 | 14168.0 | 7661.788 | 49.0 | 14161.0 | 2212.729 | 7.0 | 119.0 | 124.933333 | 4385.800000 | -194.0 | -26.933333 |
| 215 | 6.CVnaar50dC | 2022.0 | 51 | 30.288 | 84.0 | 154.0 | 14168.0 | 8019.927 | 49.0 | 14161.0 | 2570.868 | 7.0 | 70.0 | 53.633333 | 4982.866667 | 581.0 | 44.366667 |
| 216 | 6.CVnaar50dC | 2022.0 | 52 | 26.283 | 72.0 | 171.0 | 12144.0 | 7027.016 | 42.0 | 12138.0 | 2356.394 | 6.0 | 21.0 | 63.533333 | 4619.333333 | 378.0 | 20.466667 |
| 217 | 6.CVnaar50dC | 2023.0 | 52 | 1.507 | 1.0 | 1.0 | 2024.0 | 1.507 | 8.0 | 2023.0 | 405.367 | 0.0 | 0.0 | 14.000000 | 811.266667 | 0.0 | 0.000000 |
import numpy as np
from sklearn.linear_model import LinearRegression
model = LinearRegression()
x = np.array(wk_hdd[wk_hdd['period']=='6.CVnaar50dC']['HDD']).reshape(-1,1)
y = np.array(wk_hdd[wk_hdd['period']=='6.CVnaar50dC']['verbruik']).reshape(-1,1)
x.reshape((-1, 1))
array([[ 45.26666667],
[ 93.1 ],
[124.93333333],
[ 53.63333333],
[ 63.53333333],
[ 14. ]])
model = LinearRegression().fit(x, y)
r_sq = model.score(x, y)
r_sq
0.9771084153839216
print(f"intercept: {model.intercept_}")
intercept: [-7.91339632]
print(f"slope: {model.coef_}")
slope: [[0.65848499]]
mask = (df['variable']=='gas')&(df['daysago']>0)&(df['daysago']<7)
lastweek = df[mask].copy()
lastweek['cumul_hdd_wk'] = lastweek['HDD'].cumsum()
lastweek['cumul_verbruik_wk'] = lastweek['verbruik'].cumsum()
lastweek
| variable | dy | verbruik | yr | month | day | year | normaldate | cumul | FJ_mnth | ... | FJ_normaldate | period | daysago | HDD | FJ_cumul_hdd | TG | Teff | wk | cumul_hdd_wk | cumul_verbruik_wk | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 5838 | gas | 2022-12-26 | 0.811 | 2022.0 | 12.0 | 26.0 | 2024.0 | 2024-12-26 | 1156.825 | 7.0 | ... | 2023-12-26 | 6.CVnaar50dC | 6.0 | 10.266667 | 744.000000 | 66.0 | 3.733333 | 52 | 10.266667 | 0.811 |
| 5842 | gas | 2022-12-27 | 7.765 | 2022.0 | 12.0 | 27.0 | 2024.0 | 2024-12-27 | 1164.590 | 7.0 | ... | 2023-12-27 | 6.CVnaar50dC | 5.0 | 11.733333 | 755.733333 | 52.0 | 2.266667 | 52 | 22.000000 | 8.576 |
| 5846 | gas | 2022-12-28 | 4.691 | 2022.0 | 12.0 | 28.0 | 2024.0 | 2024-12-28 | 1169.281 | 7.0 | ... | 2023-12-28 | 6.CVnaar50dC | 4.0 | 9.300000 | 765.033333 | 85.0 | 4.700000 | 52 | 31.300000 | 13.267 |
| 5850 | gas | 2022-12-29 | 4.849 | 2022.0 | 12.0 | 29.0 | 2024.0 | 2024-12-29 | 1174.130 | 7.0 | ... | 2023-12-29 | 6.CVnaar50dC | 3.0 | 9.000000 | 774.033333 | 92.0 | 5.000000 | 52 | 40.300000 | 18.116 |
| 5854 | gas | 2022-12-30 | 5.763 | 2022.0 | 12.0 | 30.0 | 2024.0 | 2024-12-30 | 1179.893 | 7.0 | ... | 2023-12-30 | 6.CVnaar50dC | 2.0 | 9.233333 | 783.266667 | 83.0 | 4.766667 | 52 | 49.533333 | 23.879 |
| 5858 | gas | 2022-12-31 | 2.404 | 2022.0 | 12.0 | 31.0 | 2024.0 | 2024-12-31 | 1182.297 | 7.0 | ... | 2023-12-31 | 6.CVnaar50dC | 1.0 | 14.000000 | 797.266667 | 0.0 | 0.000000 | 52 | 63.533333 | 26.283 |
6 rows × 23 columns
chart1 = alt.Chart(lastweek,title="Previous 7 days").mark_point(
size=100).encode(
x=alt.X('HDD:Q',
axis=alt.Axis(title="HDD")),
y=alt.Y("verbruik:Q",
axis=alt.Axis(title="Verbruik")),
fill=alt.Fill('daysago:N',scale=alt.Scale(range=color_gn))
).properties(width=350,height=400).interactive()
chart2 = alt.Chart(lastweek,title="Previous 7 days").mark_point(
size=100).encode(
x=alt.X('cumul_hdd_wk:Q',
axis=alt.Axis(title="HDD")),
y=alt.Y("cumul_verbruik_wk:Q",
axis=alt.Axis(title="Verbruik")),
fill=alt.Fill('daysago:N',scale=alt.Scale(range=color_gn))
).properties(width=350,height=400).interactive()
chart1 | chart2